from lxml import etree
from lxml.html import fromstring

text = """
<html id = '11'>
  <head>
    <title>test</title>
  </head>
  <body>
    <h1>page title</h1>
  </body>
</html>
"""

# tree = etree.HTML(text)
# print(dir(tree),type(tree))
# print(tree.tag,tree.attrib)

#
# tree = etree.XML(text)
# print(tree.tag,tree.attrib,tree.find('head'))

# tree = etree.fromstring(text,parser=etree.HTMLParser())
# print(tree.tag,tree.attrib,tree.find('head'))

# 解析 HTML 文件，返回的是 ElementTree 需要getroot()变成Element对象
# tree = etree.parse('ee.html',parser=etree.HTMLParser())
# tree = tree.getroot()
# print(tree.tag,tree.attrib,tree.find('head'))


